Column

Distribution of scores by borough

Scores by cuisine of ZIP codes near me

Column

Average score over time by borough, 10/2011 - 10/2017

Restaurants with most critical reports

Cuisines with the most DOHMH-forced closures

---
title: "Food safety in NYC"
output:
  flexdashboard::flex_dashboard:
    orientation: columns
    vertical_layout: fill
    source: embed
    navbar:
      - { title: "Home", href: index.html, align: left }
      - { title: "About", href: about.html, align: left } 
      - { title: "Resume", href: resume.html, align: left }
      - { title: "Resume as doc", href: "./Johnstone Tcheou Resume.pdf", align: left }
---

```{r setup, include=FALSE}
library(p8105.datasets)
library(tidyverse)
library(flexdashboard)
library(plotly)
```

```{r import data}
data("rest_inspec")
```

Column {data-width=450}
-----------------------------------------------------------------------


### Distribution of scores by borough 

```{r distribution of scores by borough}
rest_inspec |>
  filter(!(boro == "Missing") & !is.na(score)) |> 
  mutate(
    boro = factor(boro),
    boro = fct_relevel(boro, c("STATEN ISLAND", "BRONX", "QUEENS", "MANHATTAN", "BROOKLYN"))
  ) |> 
  plot_ly(x = ~boro, y = ~score, color = ~boro, type = "violin", colors = "viridis") |> 
  layout(
    title = "NYC DOHMH restaurant sanitary inspection scores by borough",
    yaxis = list(title = "Restaurant sanitation score"),
    xaxis = list(title = "Borough"),
    shapes = list(
      list(type = "line", y0 = 14, y1 = 14, x0 = -0.5, x1 = 4.5, line = list(width = 1, dash = "dash")),
      list(type = "line", y0 = 28, y1 = 28, x0 = -0.5, x1 = 4.5, line = list(width = 1, dash = "dash"))
    )  
  ) |> 
  add_annotations(
    x = -0.5, y = 7, xref = "x", yref = "y", text = "A\n0-13", showarrow = F, font = list(size = 14)
  ) |> 
  add_annotations(
    x = -0.5, y = 21, xref = "x", yref = "y", text = "B\n14-27", showarrow = F, font = list(size = 14)
  ) |> 
  add_annotations(
    x = -0.5, y = 35, xref = "x", yref = "y", text = "C\n28+", showarrow = F, font = list(size = 14)
  )
```

### Scores by cuisine of ZIP codes near me

```{r scores by cuisine of nearby ZIPs}
rest_inspec |> 
  select(cuisine_description, score, zipcode, dba) |> 
  filter(!is.na(cuisine_description) & !is.na(score) & cuisine_description != "Not Listed/Not Applicable" & zipcode %in% c(10010, 10016, 10001)) |> 
  mutate(
    cuisine_description = factor(cuisine_description),
    cuisine_description = fct_reorder(cuisine_description, score),
    cuisine_description = fct_recode(cuisine_description, "Latin" = "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "Bottled beverages" = "Bottled beverages, including water, sodas, juices, etc.", "Cafe/Coffee/Tea" = "Café/Coffee/Tea"),
    description = str_c("Business: ", dba, "\n Score: ", score, "\nZIP code: ", zipcode)
  ) |> 
  plot_ly(x = ~cuisine_description, y = ~score, color = ~cuisine_description, type = "box", text = ~description) |> 
  layout(
    title = "Scores of restaurants in ZIP codes near me",
    yaxis = list(title = "Restaurant sanitation scores"),
    xaxis = list(title = "Cuisines"),
    shapes = list(
      list(type = "line", y0 = 14, y1 = 14, x0 = 0, x1 = 60, line = list(width = 1, dash = "dash")),
      list(type = "line", y0 = 28, y1 = 28, x0 = 0, x1 = 60, line = list(width = 1, dash = "dash"))
    )
  ) |> 
  add_annotations(
    x = -0.5, y = 6, xref = "x", yref = "y", text = "A\n0-13", showarrow = F
  ) |> 
  add_annotations(
    x = -0.5, y = 21, xref = "x", yref = "y", text = "B\n14-27", showarrow = F
  ) |> 
  add_annotations(
    x = -0.5, y = 33, xref = "x", yref = "y", text = "C\n28+", showarrow = F
  )
```

Column {data-width=450}
-----------------------------------------------------------------------

### Average score over time by borough, 10/2011 - 10/2017

```{r october 2011 to 2017 avg score by borough}
mn_avg_score <- 
  rest_inspec |> 
  select(inspection_date, score, boro) |> 
  filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "MANHATTAN") |>
  group_by(inspection_date) |> 
  summarize(avg_score = mean(score, na.rm = TRUE)) 

bk_avg_score <- 
  rest_inspec |> 
  select(inspection_date, score, boro) |> 
  filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "BROOKLYN") |>
  group_by(inspection_date) |> 
  summarize(avg_score = mean(score, na.rm = TRUE)) 

bx_avg_score <-
  rest_inspec |> 
  select(inspection_date, score, boro) |> 
  filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "BRONX") |>
  group_by(inspection_date) |> 
  summarize(avg_score = mean(score, na.rm = TRUE)) 

qn_avg_score <-
  rest_inspec |> 
  select(inspection_date, score, boro) |> 
  filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "QUEENS") |>
  group_by(inspection_date) |> 
  summarize(avg_score = mean(score, na.rm = TRUE)) 

si_avg_score <- 
  rest_inspec |> 
  select(inspection_date, score, boro) |> 
  filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "STATEN ISLAND") |>
  group_by(inspection_date) |> 
  summarize(avg_score = mean(score, na.rm = TRUE)) 

plot_ly(si_avg_score, x = ~inspection_date, y = ~avg_score, name = "Staten Island", type = "scatter", mode = "lines", connectgaps = TRUE) |> 
  add_trace(data = bk_avg_score, x = ~inspection_date, y = ~avg_score, name = "Brooklyn", type = "scatter", mode = "lines") |> 
  add_trace(data = bx_avg_score, x = ~inspection_date, y = ~avg_score, name = "The Bronx", type = "scatter", mode = "lines") |> 
  add_trace(data = qn_avg_score, x = ~inspection_date, y = ~avg_score, name = "Brooklyn", type = "scatter", mode = "lines") |> 
  add_trace(data = mn_avg_score, x = ~inspection_date, y = ~avg_score, name = "Manhattan", type = "scatter", mode = "lines") |> layout(
    title = "Average NYC DOHMH restaurant sanitary inspection scores, 10/2011-10/2017",
    yaxis = list(title = "Average restaurant sanitation score"),
    xaxis = list(title = "Inspection Date")
  )
```

### Restaurants with most critical reports

```{r restaurants with most critical reports}
critical_restaurants_levels <- 
  rest_inspec |> 
  filter(critical_flag == "Critical" & year(inspection_date) != 1900) |> 
  mutate(
    dba = case_match(
      dba,
      "DUNKIN' DONUTS, BASKIN ROBBINS" ~ "DUNKIN' DONUTS",
      .default = dba
    )
  ) |> 
  group_by(dba) |> 
  summarize(critical_reports = n()) |> 
  arrange(desc(critical_reports)) |>  
  head(n = 100) |> 
  pull(dba) 
  
rest_inspec |> 
  filter(critical_flag == "Critical" & year(inspection_date) != 1900) |> 
  mutate(
    dba = case_match(
      dba,
      "DUNKIN' DONUTS, BASKIN ROBBINS" ~ "DUNKIN' DONUTS",
      .default = dba
    ), 
    year = factor(year(inspection_date))
  ) |> 
  group_by(dba, year) |> 
  summarize(critical_reports = n()) |> 
  arrange(desc(critical_reports), year) |>  
  head(n = 100) |>
  mutate(
    dba = factor(dba),
    dba = fct_relevel(dba, critical_restaurants_levels)
  ) |> 
  plot_ly(x = ~dba, y = ~critical_reports, type = "bar", color = ~year, colors = "viridis") |> 
  layout(
    title = "NYC DOHMH critical violations by restaurant",
    yaxis = list(title = "Critical violations"),
    xaxis = list(title = "Restaurants")
  )
```

### Cuisines with the most DOHMH-forced closures

```{r cuisines with most DOHMH closures}
cuisine_levels <-
  rest_inspec |> 
  mutate(
    cuisine_description = iconv(cuisine_description, from = 'UTF-8', to = 'ISO-8859-1')
  ) |> 
  select(boro, critical_flag, action, dba, cuisine_description, inspection_date) |> 
  filter(str_detect(action, "[Cc]losed")) |>  
  group_by(cuisine_description) |> 
  summarize(closed = n()) |> 
  arrange(desc(closed)) |>  
  pull(cuisine_description)

rest_inspec |> 
  mutate(
    cuisine_description = iconv(cuisine_description, from = 'UTF-8', to = 'ISO-8859-1')
  ) |> 
  select(boro, critical_flag, action, dba, cuisine_description, inspection_date) |> 
  filter(str_detect(action, "[Cc]losed")) |>  
  group_by(boro, cuisine_description) |> 
  summarize(closed = n()) |> 
  arrange(desc(closed)) |> 
  mutate(
    cuisine_description = factor(cuisine_description),
    cuisine_description = fct_relevel(cuisine_description, cuisine_levels),
    cuisine_description = fct_recode(cuisine_description, "Latin" = "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "Bottled beverages" = "Bottled beverages, including water, sodas, juices, etc.")
  ) |> 
  plot_ly(x = ~cuisine_description, y = ~closed, color = ~boro, type = "bar") |> 
  layout(
    title = "Cuisines with the most restaurants closed by the DOHMH",
    yaxis = list(title = "Number of restaurants closed"),
    xaxis = list(title = "Cuisines")
  )
```